In [ ]:
import pandas as pd
import numpy as np
import plotly.express as px
all_data = pd.read_csv("spotify-2023.csv", encoding='latin-1')
all_data['streams'] = pd.to_numeric(all_data['streams'], errors='coerce')
In [ ]:
most_streamed_song_by_year = all_data.loc[all_data.groupby('released_year')['streams'].idxmax()]
clean_data = most_streamed_song_by_year[['track_name', 'artist(s)_name', 'released_year', 'streams']]
fig = px.bar(clean_data, x='released_year', y='streams', hover_data=['artist(s)_name', 'track_name'], title='Most Streamed Songs by Year')
fig.show(renderer="notebook")
In [ ]:
top_songs_by_year = all_data.groupby('released_year').apply(lambda group: group.nlargest(3, 'streams')).reset_index(drop=True)
top_songs_by_year = top_songs_by_year[['track_name', 'artist(s)_name', 'released_year', 'streams']]
top_songs_by_year = top_songs_by_year.query("""released_year >= 2000""")
custom_colors = ['#193737' , '#FDF4E3' , '#308446']
fig = px.bar(top_songs_by_year, x='released_year' , y='streams' , color=top_songs_by_year.groupby('released_year').cumcount(), hover_data=['track_name', 'artist(s)_name', 'released_year', 'streams'], text='track_name' , title='Top 3 streamed songs by year')
fig.show(renderer="notebook")
In [ ]:
data = all_data[['released_year', 'streams']]
data = data.query("released_year >= 2010")
data = data.groupby('released_year')['streams'].sum().reset_index()
In [ ]:
fig = px.bar(data, title='Streams By Year', x='released_year', y='streams', labels={'released_year': "Year", 'streams': "Total_Streams"})
fig.show(renderer="notebook")
In [ ]: